{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = tf.train.NewCheckpointReader('t2t-small/model.ckpt-500000')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "transformer/symbol_modality_32128_512/target_emb/weights_0\n",
      "transformer/symbol_modality_32128_512/input_emb/weights_0\n",
      "transformer/body/encoder/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_5/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/encoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_5/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_5/ffn/conv2/bias\n",
      "transformer/body/encoder/layer_5/ffn/conv1/bias\n",
      "transformer/body/encoder/layer_4/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/encoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_4/ffn/conv2/bias\n",
      "transformer/body/encoder/layer_4/ffn/conv1/kernel\n",
      "transformer/body/encoder/layer_4/ffn/conv1/bias\n",
      "transformer/body/encoder/layer_3/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/encoder/layer_3/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_3/ffn/conv1/kernel\n",
      "transformer/body/encoder/layer_3/ffn/conv1/bias\n",
      "transformer/body/encoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_2/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_1/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/encoder/layer_1/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/encoder/layer_1/self_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/encoder/layer_1/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/encoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_1/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_1/ffn/conv2/bias\n",
      "transformer/body/encoder/layer_1/ffn/conv1/kernel\n",
      "transformer/body/encoder/layer_1/ffn/conv1/bias\n",
      "transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_0/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_0/ffn/conv1/bias\n",
      "transformer/body/decoder/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_5/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_5/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_5/ffn/conv2/kernel\n",
      "transformer/body/decoder/layer_5/ffn/conv1/kernel\n",
      "transformer/body/decoder/layer_5/encdec_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_5/encdec_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_4/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/encoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_4/self_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_4/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_4/ffn/conv1/kernel\n",
      "transformer/body/decoder/layer_4/ffn/conv1/bias\n",
      "transformer/body/encoder/layer_0/self_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_5/encdec_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_4/encdec_attention/multihead_attention/v/kernel\n",
      "transformer/body/decoder/layer_4/encdec_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_4/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/encoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_4/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_3/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_3/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_3/ffn/conv2/kernel\n",
      "transformer/body/decoder/layer_5/ffn/conv1/bias\n",
      "transformer/body/decoder/layer_3/ffn/conv2/bias\n",
      "transformer/body/decoder/layer_3/ffn/conv1/bias\n",
      "transformer/body/decoder/layer_3/encdec_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_3/encdec_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_3/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/decoder/layer_2/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/decoder/layer_2/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_2/self_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_2/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_2/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_2/ffn/conv1/kernel\n",
      "training/transformer/body/decoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/decoder/layer_0/encdec_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_0/self_attention/multihead_attention/k/kernel\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_2/ffn/conv2/kernel\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/ffn/conv1/bias/Adafactor\n",
      "transformer/body/decoder/layer_2/encdec_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_4/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/encoder/layer_2/ffn/conv1/kernel\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_2/ffn/conv2/bias\n",
      "training/transformer/body/decoder/layer_5/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_2/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_0/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/encoder/layer_5/self_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/decoder/layer_5/ffn/conv2/bias\n",
      "training/transformer/body/decoder/layer_4/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_3/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_4/encdec_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "losses_avg/problem_0/training_loss\n",
      "transformer/body/encoder/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/encdec_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/decoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_2/ffn/conv1/bias/Adafactor\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training/transformer/body/decoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "transformer/body/encoder/layer_4/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/encoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/symbol_modality_32128_512/input_emb/weights_0/Adafactor\n",
      "training/transformer/body/decoder/layer_3/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/decoder/layer_3/encdec_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_5/ffn/conv1/bias/Adafactor\n",
      "transformer/body/encoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/encoder/layer_0/ffn/conv2/bias\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "transformer/body/decoder/layer_0/ffn/conv1/bias\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/encoder/layer_0/ffn/conv1/kernel\n",
      "training/transformer/body/decoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/encoder/layer_3/ffn/conv2/bias\n",
      "training/transformer/body/decoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_5/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/symbol_modality_32128_512/softmax/weights_0\n",
      "training/transformer/body/decoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/encoder/layer_3/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_1/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_3/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/encoder/layer_5/self_attention/multihead_attention/k/kernel\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_5/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/encoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_0/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_1/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/target_space_embedding/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_2/ffn/conv2/bias\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "transformer/body/decoder/layer_5/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_0/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_0/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_2/ffn/conv1/bias/Adafactor\n",
      "global_step\n",
      "training/transformer/body/decoder/layer_2/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_3/ffn/conv1/kernel\n",
      "training/transformer/body/decoder/layer_0/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "train_stats/problem_0_steps\n",
      "transformer/body/encoder/layer_4/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_3/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_4/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/decoder/layer_3/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/decoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/decoder/layer_3/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/encoder/layer_2/self_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/decoder/layer_0/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_3/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_5/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/encoder/layer_0/self_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/decoder/layer_2/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "losses_avg/problem_0/total_loss\n",
      "transformer/body/decoder/layer_4/ffn/conv2/kernel\n",
      "training/transformer/body/decoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_4/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "losses_avg/problem_0/extra_loss\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_2/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/target_space_embedding/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/encoder/layer_4/ffn/conv2/kernel\n",
      "training/transformer/body/decoder/layer_5/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_1/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_4/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_2/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_2/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "transformer/body/decoder/layer_5/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_1/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_0/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_2/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_2/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/target_space_embedding/kernel\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_4/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_1/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_5/self_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/encoder/layer_2/ffn/conv1/bias\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_0/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_0/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_0/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_0/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_0/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_3/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/encoder/layer_2/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "transformer/body/decoder/layer_3/encdec_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_1/encdec_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/encoder/layer_1/ffn/conv1/bias/Adafactor\n",
      "training/transformer/body/decoder/layer_3/encdec_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_1/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/decoder/layer_5/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_5/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_3/ffn/conv1/kernel/Adafactor\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/v/kernel/Adafactor\n",
      "transformer/body/encoder/layer_5/ffn/conv1/kernel\n",
      "training/transformer/symbol_modality_32128_512/softmax/weights_0/Adafactor_1\n",
      "transformer/body/decoder/layer_1/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "training/transformer/body/encoder/layer_3/ffn/conv1/bias/Adafactor\n",
      "transformer/body/encoder/layer_2/self_attention/multihead_attention/v/kernel\n",
      "transformer/body/decoder/layer_5/encdec_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/encoder/layer_2/ffn/conv2/bias/Adafactor\n",
      "training/transformer/body/encoder/layer_2/ffn/conv2/kernel/Adafactor\n",
      "transformer/body/encoder/layer_0/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/encoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/encoder/layer_0/self_attention/multihead_attention/k/kernel\n",
      "training/transformer/body/encoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/ffn/conv1/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_0/encdec_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/encoder/layer_3/ffn/conv2/kernel/Adafactor\n",
      "transformer/body/encoder/layer_2/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/encoder/layer_3/ffn/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_3/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/k/kernel/Adafactor\n",
      "transformer/body/decoder/layer_0/ffn/conv2/bias\n",
      "transformer/body/decoder/layer_2/ffn/conv1/bias\n",
      "training/transformer/body/encoder/layer_0/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "transformer/body/encoder/layer_2/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/encoder/layer_3/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_0/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/ffn/conv1/kernel/Adafactor_1\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_1/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/ffn/conv2/bias/Adafactor\n",
      "transformer/body/decoder/layer_2/encdec_attention/multihead_attention/k/kernel\n",
      "training/transformer/body/encoder/layer_4/ffn/conv2/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/decoder/layer_2/encdec_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "training/transformer/body/decoder/layer_4/encdec_attention/multihead_attention/output_transform/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_2/encdec_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/encoder/layer_4/ffn/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/q/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_4/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_5/ffn/conv1/kernel/Adafactor\n",
      "transformer/body/decoder/layer_0/encdec_attention/multihead_attention/output_transform/kernel\n",
      "transformer/body/decoder/layer_2/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/body/decoder/layer_3/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/k/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_5/ffn/conv2/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "training/transformer/body/decoder/layer_5/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "training/transformer/body/encoder/layer_5/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/body/encoder/layer_prepostprocess/layer_norm/layer_norm_scale/Adafactor\n",
      "transformer/body/encoder/layer_2/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "training/transformer/symbol_modality_32128_512/input_emb/weights_0/Adafactor_1\n",
      "training/transformer/symbol_modality_32128_512/softmax/weights_0/Adafactor\n",
      "training/transformer/body/decoder/layer_4/self_attention/multihead_attention/q/kernel/Adafactor_1\n",
      "training/transformer/symbol_modality_32128_512/target_emb/weights_0/Adafactor\n",
      "transformer/body/encoder/layer_3/self_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/decoder/layer_5/encdec_attention/multihead_attention/v/kernel/Adafactor\n",
      "training/transformer/symbol_modality_32128_512/target_emb/weights_0/Adafactor_1\n",
      "transformer/body/decoder/layer_0/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_0/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_0/encdec_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_0/ffn/conv1/kernel\n",
      "training/transformer/body/encoder/layer_2/self_attention/multihead_attention/output_transform/kernel/Adafactor\n",
      "transformer/body/decoder/layer_0/ffn/conv2/kernel\n",
      "transformer/body/encoder/layer_3/self_attention/multihead_attention/q/kernel\n",
      "transformer/body/decoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_0/ffn/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_0/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_0/self_attention/multihead_attention/v/kernel\n",
      "training/transformer/body/decoder/layer_4/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias/Adafactor\n",
      "transformer/body/decoder/layer_1/encdec_attention/multihead_attention/k/kernel\n",
      "training/transformer/body/decoder/layer_3/ffn/conv1/kernel/Adafactor\n",
      "training/transformer/body/encoder/layer_5/ffn/conv1/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/encdec_attention/multihead_attention/output_transform/kernel\n",
      "training/transformer/body/encoder/layer_1/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/ffn/conv1/bias\n",
      "transformer/body/decoder/layer_1/ffn/conv1/kernel\n",
      "transformer/body/decoder/layer_1/ffn/conv2/bias\n",
      "training/transformer/body/decoder/layer_3/self_attention/multihead_attention/v/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_1/ffn/conv2/kernel\n",
      "transformer/body/decoder/layer_4/encdec_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_1/ffn/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_4/ffn/conv2/bias\n",
      "transformer/body/decoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_bias\n",
      "transformer/body/decoder/layer_1/self_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n",
      "transformer/body/decoder/layer_1/self_attention/multihead_attention/k/kernel\n",
      "transformer/body/decoder/layer_1/self_attention/multihead_attention/q/kernel\n",
      "training/transformer/body/encoder/layer_0/ffn/conv2/kernel/Adafactor_1\n",
      "transformer/body/decoder/layer_2/encdec_attention/layer_prepostprocess/layer_norm/layer_norm_scale\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "transformer/body/decoder/layer_2/encdec_attention/multihead_attention/output_transform/kernel\n"
     ]
    }
   ],
   "source": [
    "new_checkpoint_vars = {}\n",
    "\n",
    "for old_name in reader.get_variable_to_shape_map():\n",
    "    print(old_name)\n",
    "    new_name = old_name.replace('transformer', 'transformer_tag')\n",
    "    new_name = new_name.replace('symbol_modality_32128_512/target_emb', 'symbol_modality_32128_512/targets/target_emb')\n",
    "    new_name = new_name.replace('symbol_modality_32128_512/softmax', 'targets/symbol_modality_32128_512/softmax')\n",
    "    new_checkpoint_vars[new_name] = tf.Variable(reader.get_tensor(old_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "init = tf.global_variables_initializer()\n",
    "saver = tf.train.Saver(new_checkpoint_vars)\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(init)\n",
    "    saver.save(sess, 'small-tatabahasa/model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
